package WebPAC::Parser;

use warnings;
use strict;


use PPI;
use PPI::Dumper;
use Data::Dump qw/dump/;
use File::Slurp;

use base qw/WebPAC::Common/;

=head1 NAME

WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it

=head1 VERSION

Version 0.08

=cut

our $VERSION = '0.08';

=head1 SYNOPSIS

This module will parse L<WebPAC::Normalize/lookup> directives and generate source
to produce lookups and normalization. It will also parse other parts of
source to produce some of DWIM (I<Do What I Mean>) magic
(like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
rules in normalisation).

It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
reading about LISP. It might be a bit over-the board, but at least it removed
separate configuration files for lookups.

This is experimental code, but it replaces all older formats which where,
at one point in time, available in WebPAC.

FIXME

=head1 FUNCTIONS

=head2 new

Create new parser object.

  my $parser = new WebPAC::Parser(
  	config => new WebPAC::Config(),
	base_path => '/optional/path/to/conf',
  );

=cut

sub new {
	my $class = shift;
	my $self = {@_};
	bless($self, $class);

	my $log = $self->_get_logger();

	$log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));

	$log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));

	$self->_read_sources;

	$self ? return $self : return undef;
}

=head2 valid_database

  my $ok = $parse->valid_database('key');

=cut

sub valid_database {
	my $self = shift;

	my $database = shift || return;

	return defined($self->{valid_inputs}->{ _q($database) });
}

=head2 valid_database_input

  my $ok = $parse->valid_database('database_key','input_name');

=cut

sub valid_database_input {
	my $self = shift;
	my ($database,$input) = @_;
	$input = _input_name($input);
	return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
}

=head2 depends

Return all databases and inputs on which specified one depends

  $depends_on = $parser->depends('database','input');

=cut

sub depends {
	my $self = shift;
	my ($database,$input) = @_;
	$input = _input_name($input);
	$self->_get_logger->debug("depends($database,$input)");
	return unless (
		defined( $self->{depends}->{ _q($database) } ) &&
		defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
	);
	return $self->{depends}->{ _q($database) }->{ _q($input) };
}

=head2 have_lookup_create

  my @keys = $parser->have_lookup_create($database, $input);

=cut

sub have_lookup_create {
	my $self = shift;
	my ($database,$input) = @_;
	$input = _input_name($input);
	return unless (
		defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
		defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
	);
	return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
}


=head2 lookup_create_rules

  my $source = $parser->lookup_create_rules($database, $input);

=cut

sub lookup_create_rules {
	my $self = shift;
	my ($database,$input) = @_;
	$input = _input_name($input);
	return unless (
		defined( $self->{_lookup_create}->{ _q($database) } ) &&
		defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
	);
	return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
}

=head2 normalize_rules

  my $source = $parser->normalize_rules($database, $input);

=cut

sub normalize_rules {
	my $self = shift;
	my ($database,$input) = @_;
	$input = _input_name($input);
	return unless (
		defined( $self->{_normalize_source}->{ _q($database) } ) &&
		defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
	);
	return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
}


=head2 have_rules

  my $do_marc = $parser->have_rules('marc', $database, $input);
  my $do_index = $parser->have_rules('search', $database);

This function will return hash containing count of all found C<marc_*> or
C<search> directives. Input name is optional.

=cut

sub have_rules {
	my $self = shift;

	my $log = $self->_get_logger();
	my $type = shift @_ || $log->logconfess("need at least type");
	my $database = shift @_ || $log->logconfess("database is required");
	my $input = shift @_;

	$input = _input_name($input);


	return unless defined( $self->{_have_rules}->{ _q($database) } );

	my $database_rules = $self->{_have_rules}->{ _q($database ) };

	if (defined($input)) {

		return unless (
			defined( $database_rules->{ _q($input) } ) &&
			defined( $database_rules->{ _q($input) }->{ $type } )
		);

		return $database_rules->{ _q($input) }->{ $type };
	}

	my $usage;

	foreach my $i (keys %{ $database_rules }) {
		next unless defined( $database_rules->{$i}->{$type} );

		foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
			$usage->{ $t } += $database_rules->{ $i }->{ $t };
		}
	}

	return $usage;

}


=head1 PRIVATE

=head2 _read_sources

  my $source_files = $parser->_read_sources;

Called by L</new>.

=cut

sub _read_sources {
	my $self = shift;

	my $log = $self->_get_logger();

	my $nr = 0;

	my @sources;

	$self->{config}->iterate_inputs( sub {
		my ($input, $database) = @_;

		$log->debug("database: $database input = ", dump($input));

		foreach my $normalize (@{ $input->{normalize} }) {

			my $path = $normalize->{path};
			return unless($path);
			my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;

			$log->logdie("normalization input $full doesn't exist") unless (-e $full);

			my $s = read_file( $full ) || $log->logdie("can't read $full: $!");

			my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));

			$log->debug("$database/$input_name: adding $path");

			$self->{valid_inputs}->{$database}->{$input_name}++;

			push @sources, sub {
				$self->_parse_source( $database, $input_name, $full, $s );
			};

			$nr++;
		}
	} );

	$log->debug("found $nr source files");

	# parse all sources
	$_->() foreach (@sources);

	return $nr;
}

=head2 _parse_source

  $parser->_parse_source($database,$input,$path,$source);

Called for each normalize source (rules) in each input by L</_read_sources>

It will report invalid databases and inputs in error log after parsing.

=cut

sub _parse_source {
	my $self = shift;
	my ($database, $input, $path, $source) = @_;

	$input = _input_name($input);

	my $log = $self->_get_logger();

	$log->logdie("invalid database $database" ) unless $self->valid_database( $database );
	$log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );

	$log->logdie("no source found for database $database input $input path $path") unless ($source);

	$log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");

	my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});

	$Document->prune('PPI::Token::Whitespace');
	$Document->prune('PPI::Token::Comment');
	#$Document->prune('PPI::Token::Operator');

	# Find all the named subroutines

	$self->{_lookup_errors} = ();

	sub _lookup_error {
		my $self = shift;
		my $msg = shift;
		$self->_get_logger->logconfess("error without message?") unless ($msg);
		push @{ $self->{_lookup_errors} }, $msg;
		return '';
	}

	$Document->find( sub {
			my ($Document,$Element) = @_;

			$Element->isa('PPI::Token::Word') or return '';
			$Element->content eq 'lookup' or return '';

			$log->debug("expansion: ", $Element->snext_sibling);

			my $args = $Element->snext_sibling;
		
			my @e = $args->child(0)->elements;
			$log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);

			if ($log->is_debug) {
				my $report = "found " . scalar @e . " elements:\n";

				foreach my $i ( 0 .. $#e ) {
					$report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
				}

				$log->debug($report);
			}

			my $key_element = $e[8]->clone;

			$log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');

			$log->debug("key part: ", $key_element);

			my @key;

			$key_element->find( sub {
				my $e = $_[1] || die "no element?";
				$e->isa('PPI::Token::Word') or return '';
				$e->content eq 'rec' or return '';

				my $kf = $e->snext_sibling;

				$log->debug("key fragment = $kf");

				push @key, eval $kf;
				$log->logdie("can't eval { $kf }: $@") if ($@);

				return 1;
			});

			my $key = join('-', @key ) || $log->logdie("no key found!");

			$log->debug("key = $key");

			return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
			return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );

			my $create = qq{
				save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
			};

			$log->debug("create: $create");

			# save code to create this lookup
			$self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
			$self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;


			if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
				$log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
			}

			# save this dependency
			$self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;

			if ($#e < 10) {
				$e[8]->insert_after( $e[8]->clone );
				$e[8]->insert_after( $e[7]->clone );
				$e[8]->insert_after( $e[6]->clone );
			}

			$e[7]->remove;
			$e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
			$e[8]->remove;


			$log->debug(">>> ", $Element->snext_sibling);
	});

	my $normalize_source = $Document->serialize;
	$log->debug("create: ", dump($self->{_lookup_create}) );
	$log->debug("normalize: $normalize_source");

	$self->{_normalize_source}->{$database}->{$input} .= $normalize_source;

	if ($self->{debug}) {
		my $Dumper = PPI::Dumper->new( $Document );
		$Dumper->print;
	}

	$log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});

	$Document->find( sub {
			my ($Document,$Element) = @_;

			$Element->isa('PPI::Token::Word') or return '';
			if ($Element->content =~ m/^(marc|search)/) {
				my $what = $1;
				$log->debug("found $what rules in $database/$input");
				$self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
			} else {
				return '';
			}
	});

	return 1;
}


=head2 _q

Strip single or double quotes around value

  _q(qq/'foo'/) -> foo

=cut

sub _q {
	my $v = shift || return;
	$v =~ s/^['"]*//g;
	$v =~ s/['"]*$//g;
	return $v;
}

=head2 _input_name

Return C<name> value if HASH or arg if scalar

  _input_name($input)

=cut

sub _input_name {
	my $input = shift || return;
	if (ref($input) eq 'HASH') {
		die "can't find 'name' value in ", dump($input) unless defined($input->{name});
		return $input->{name};
	} else {
		return $input;
	}
}


=head1 AUTHOR

Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>

=head1 COPYRIGHT & LICENSE

Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

1; # End of WebPAC::Parser
